from scrapy.spiders import Spider
from scrapy.selector import Selector

from dirbot.items import Website


class DmozSpider(Spider):
    name = "dmoz"
    allowed_domains = ["dmoz.org"]
    start_urls = [
        "http://www.baidu.com",

    ]

    def parse(self, response):
        """

        """
        sites = response.xpath("//a")
        items = []

        for site in sites:
            item = Website()
            item['name'] = unicode(site.xpath("text()").extract_first())
            item['url'] = site.xpath(
                '@href').extract_first()
            item['description'] = unicode(site.xpath("text()").extract_first())
            items.append(item)
        print "over"
        return items
